import json
import time
import sys

from config import getConfigParams
from requestUrl import getDocByUrl,getEntryIdByUrl
from formatContent import formatContent
from logs import saveLog

# 获取配置参数
configParams = getConfigParams(sys.argv)
url = configParams['real_url']
env = configParams['env']

# 获取文章ID
entryId = getEntryIdByUrl(configParams['base_url'])

# 获取html文本doc，得到pq对象
doc = getDocByUrl(url)

# 标题
title = doc('.postTitle').text()

# 文章正文
articleContent = doc('#cnblogs_post_body')

# 文章作者
authorName = doc(doc('.postDesc a')[0]).text()

# 时间
publicTime = doc('#post-date').text()

# 去【李世泉】那里过滤标签
content = formatContent(articleContent.html(),env=env)

#结果输出
result = {
    'entry_id': entryId,
    'not_format_content': articleContent.html(),
    'content': content,
    'author_name': authorName,
    'cover_url': '',
    'public_time': 0,
    'description': '',
    'title': title,
}

# 日志参数
logData = {
    'params': sys.argv,
    'result': result
}
saveLog(json.dumps(logData, ensure_ascii=False))

print(json.dumps(result, ensure_ascii=False))
